#from pandas_profiling import ProfileReport
import pandas as pd
import numpy as np
from numpy import ravel
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import KNNImputer
from sklearn.compose import ColumnTransformer
from category_encoders import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import KFold
from sklearn.dummy import DummyRegressor
from skopt.optimizer import dummy_minimize
import plotly.express as px
import plotly.graph_objects as go
from lightgbm import LGBMRegressor
TRAIN_PATH = r'C:\Users\heylu\Dropbox\House Prices - Advanced Regression Techniques\dataset\train.csv'
TEST_PATH = r'C:\Users\heylu\Dropbox\House Prices - Advanced Regression Techniques\dataset\test.csv'
SEED = 4
df_train = pd.read_csv(TRAIN_PATH)
df_test = pd.read_csv(TEST_PATH)
df_train.drop(columns='Id',inplace=True)
#profile = ProfileReport(df_train, minimal=True)
#profile.to_file(output_file='df_train_report.html')
df_train.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1460 entries, 0 to 1459 Data columns (total 80 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 MSSubClass 1460 non-null int64 1 MSZoning 1460 non-null object 2 LotFrontage 1201 non-null float64 3 LotArea 1460 non-null int64 4 Street 1460 non-null object 5 Alley 91 non-null object 6 LotShape 1460 non-null object 7 LandContour 1460 non-null object 8 Utilities 1460 non-null object 9 LotConfig 1460 non-null object 10 LandSlope 1460 non-null object 11 Neighborhood 1460 non-null object 12 Condition1 1460 non-null object 13 Condition2 1460 non-null object 14 BldgType 1460 non-null object 15 HouseStyle 1460 non-null object 16 OverallQual 1460 non-null int64 17 OverallCond 1460 non-null int64 18 YearBuilt 1460 non-null int64 19 YearRemodAdd 1460 non-null int64 20 RoofStyle 1460 non-null object 21 RoofMatl 1460 non-null object 22 Exterior1st 1460 non-null object 23 Exterior2nd 1460 non-null object 24 MasVnrType 1452 non-null object 25 MasVnrArea 1452 non-null float64 26 ExterQual 1460 non-null object 27 ExterCond 1460 non-null object 28 Foundation 1460 non-null object 29 BsmtQual 1423 non-null object 30 BsmtCond 1423 non-null object 31 BsmtExposure 1422 non-null object 32 BsmtFinType1 1423 non-null object 33 BsmtFinSF1 1460 non-null int64 34 BsmtFinType2 1422 non-null object 35 BsmtFinSF2 1460 non-null int64 36 BsmtUnfSF 1460 non-null int64 37 TotalBsmtSF 1460 non-null int64 38 Heating 1460 non-null object 39 HeatingQC 1460 non-null object 40 CentralAir 1460 non-null object 41 Electrical 1459 non-null object 42 1stFlrSF 1460 non-null int64 43 2ndFlrSF 1460 non-null int64 44 LowQualFinSF 1460 non-null int64 45 GrLivArea 1460 non-null int64 46 BsmtFullBath 1460 non-null int64 47 BsmtHalfBath 1460 non-null int64 48 FullBath 1460 non-null int64 49 HalfBath 1460 non-null int64 50 BedroomAbvGr 1460 non-null int64 51 KitchenAbvGr 1460 non-null int64 52 KitchenQual 1460 non-null object 53 TotRmsAbvGrd 1460 non-null int64 54 Functional 1460 non-null object 55 Fireplaces 1460 non-null int64 56 FireplaceQu 770 non-null object 57 GarageType 1379 non-null object 58 GarageYrBlt 1379 non-null float64 59 GarageFinish 1379 non-null object 60 GarageCars 1460 non-null int64 61 GarageArea 1460 non-null int64 62 GarageQual 1379 non-null object 63 GarageCond 1379 non-null object 64 PavedDrive 1460 non-null object 65 WoodDeckSF 1460 non-null int64 66 OpenPorchSF 1460 non-null int64 67 EnclosedPorch 1460 non-null int64 68 3SsnPorch 1460 non-null int64 69 ScreenPorch 1460 non-null int64 70 PoolArea 1460 non-null int64 71 PoolQC 7 non-null object 72 Fence 281 non-null object 73 MiscFeature 54 non-null object 74 MiscVal 1460 non-null int64 75 MoSold 1460 non-null int64 76 YrSold 1460 non-null int64 77 SaleType 1460 non-null object 78 SaleCondition 1460 non-null object 79 SalePrice 1460 non-null int64 dtypes: float64(3), int64(34), object(43) memory usage: 912.6+ KB
Dados Faltantes de Dataset de Treino
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
display(df_train.isnull().sum())
MSSubClass 0 MSZoning 0 LotFrontage 259 LotArea 0 Street 0 Alley 1369 LotShape 0 LandContour 0 Utilities 0 LotConfig 0 LandSlope 0 Neighborhood 0 Condition1 0 Condition2 0 BldgType 0 HouseStyle 0 OverallQual 0 OverallCond 0 YearBuilt 0 YearRemodAdd 0 RoofStyle 0 RoofMatl 0 Exterior1st 0 Exterior2nd 0 MasVnrType 8 MasVnrArea 8 ExterQual 0 ExterCond 0 Foundation 0 BsmtQual 37 BsmtCond 37 BsmtExposure 38 BsmtFinType1 37 BsmtFinSF1 0 BsmtFinType2 38 BsmtFinSF2 0 BsmtUnfSF 0 TotalBsmtSF 0 Heating 0 HeatingQC 0 CentralAir 0 Electrical 1 1stFlrSF 0 2ndFlrSF 0 LowQualFinSF 0 GrLivArea 0 BsmtFullBath 0 BsmtHalfBath 0 FullBath 0 HalfBath 0 BedroomAbvGr 0 KitchenAbvGr 0 KitchenQual 0 TotRmsAbvGrd 0 Functional 0 Fireplaces 0 FireplaceQu 690 GarageType 81 GarageYrBlt 81 GarageFinish 81 GarageCars 0 GarageArea 0 GarageQual 81 GarageCond 81 PavedDrive 0 WoodDeckSF 0 OpenPorchSF 0 EnclosedPorch 0 3SsnPorch 0 ScreenPorch 0 PoolArea 0 PoolQC 1453 Fence 1179 MiscFeature 1406 MiscVal 0 MoSold 0 YrSold 0 SaleType 0 SaleCondition 0 SalePrice 0 dtype: int64
Dados Faltantes de Dataset de Teste
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
display(df_test.isnull().sum())
Id 0 MSSubClass 0 MSZoning 4 LotFrontage 227 LotArea 0 Street 0 Alley 1352 LotShape 0 LandContour 0 Utilities 2 LotConfig 0 LandSlope 0 Neighborhood 0 Condition1 0 Condition2 0 BldgType 0 HouseStyle 0 OverallQual 0 OverallCond 0 YearBuilt 0 YearRemodAdd 0 RoofStyle 0 RoofMatl 0 Exterior1st 1 Exterior2nd 1 MasVnrType 16 MasVnrArea 15 ExterQual 0 ExterCond 0 Foundation 0 BsmtQual 44 BsmtCond 45 BsmtExposure 44 BsmtFinType1 42 BsmtFinSF1 1 BsmtFinType2 42 BsmtFinSF2 1 BsmtUnfSF 1 TotalBsmtSF 1 Heating 0 HeatingQC 0 CentralAir 0 Electrical 0 1stFlrSF 0 2ndFlrSF 0 LowQualFinSF 0 GrLivArea 0 BsmtFullBath 2 BsmtHalfBath 2 FullBath 0 HalfBath 0 BedroomAbvGr 0 KitchenAbvGr 0 KitchenQual 1 TotRmsAbvGrd 0 Functional 2 Fireplaces 0 FireplaceQu 730 GarageType 76 GarageYrBlt 78 GarageFinish 78 GarageCars 1 GarageArea 1 GarageQual 78 GarageCond 78 PavedDrive 0 WoodDeckSF 0 OpenPorchSF 0 EnclosedPorch 0 3SsnPorch 0 ScreenPorch 0 PoolArea 0 PoolQC 1456 Fence 1169 MiscFeature 1408 MiscVal 0 MoSold 0 YrSold 0 SaleType 1 SaleCondition 0 dtype: int64
fig = px.box(df_train['SalePrice'], labels={'value': 'Preço', 'variable': 'Variável'})
fig.update_layout(width=350, title='Distribuição de Preço', hovermode="x")
fig = px.imshow(df_train.corr()[df_train.corr() >= 0.90])
fig.update_layout(title = 'Correlação de Features com correlação maior igual a 0,90')
display(f"Dados de Treino: {len(df_train[['SalePrice']][df_train['SalePrice'] <= 340000])}")
'Dados de Treino: 1399'
display(f"Outliers: {len(df_train[['SalePrice']][df_train['SalePrice'] > 340000])}")
'Outliers: 61'
df_train = df_train[:][df_train['SalePrice'] <= 340000]
df_train[:][df_train['SalePrice'] > 340000]
| MSSubClass | MSZoning | LotFrontage | LotArea | Street | Alley | LotShape | LandContour | Utilities | LotConfig | ... | PoolArea | PoolQC | Fence | MiscFeature | MiscVal | MoSold | YrSold | SaleType | SaleCondition | SalePrice |
|---|
0 rows × 80 columns
df_test.CentralAir = df_test.CentralAir.map({'N': 0, 'Y': 1})
df_train.CentralAir = df_train.CentralAir.map({'N': 0, 'Y': 1})
imputer = KNNImputer(missing_values=np.nan)
df_train[['LotFrontage']] = imputer.fit_transform(df_train[['LotFrontage']]).ravel()
df_train.Alley.fillna('NA', inplace=True)
df_train['MasVnrArea'] = df_train['MasVnrArea'].fillna(0)
df_train['MasVnrType'] = df_train['MasVnrType'].fillna('None')
df_train[['BsmtQual','BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1','BsmtFinType2']] = df_train[['BsmtQual','BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1','BsmtFinType2']].fillna('NA')
df_train.FireplaceQu = df_train.FireplaceQu.fillna('NA')
df_train[['GarageType',
'GarageYrBlt',
'GarageFinish',
'GarageCars',
'GarageArea',
'GarageQual',
'GarageCond']] = df_train[['GarageType',
'GarageYrBlt',
'GarageFinish',
'GarageCars',
'GarageArea',
'GarageQual',
'GarageCond']].fillna('NA')
df_train[['PoolQC','Fence', 'MiscFeature']] = df_train[['PoolQC','Fence', 'MiscFeature']].fillna('NA')
df_train.dropna(inplace=True)
imputer = KNNImputer(missing_values=np.nan)
df_test[['LotFrontage']] = imputer.fit_transform(df_test[['LotFrontage']]).ravel()
df_test.Alley.fillna('NA', inplace=True)
df_test['MasVnrArea'] = df_train['MasVnrArea'].fillna(0)
df_test['MasVnrType'] = df_train['MasVnrType'].fillna('None')
df_test['BsmtFinSF1'].fillna(0, inplace=True)
df_test[['BsmtQual','BsmtCond', 'BsmtExposure', 'BsmtFinType1','BsmtFinType2']] = df_test[['BsmtQual','BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2']].fillna('NA')
df_test.FireplaceQu = df_test.FireplaceQu.fillna('NA')
df_test[['GarageType',
'GarageYrBlt',
'GarageFinish',
'GarageCars',
'GarageArea',
'GarageQual',
'GarageCond']] = df_train[['GarageType',
'GarageYrBlt',
'GarageFinish',
'GarageCars',
'GarageArea',
'GarageQual',
'GarageCond']].fillna('NA')
df_test[['PoolQC','Fence', 'MiscFeature']] = df_test[['PoolQC','Fence', 'MiscFeature']].fillna('NA')
df_test[['BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'TotalBsmtSF', 'BsmtUnfSF', 'BsmtFinSF2']] = df_test[['BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'TotalBsmtSF', 'BsmtUnfSF', 'BsmtFinSF2']].fillna(0)
df_test.dropna(inplace=True)
features = df_train.drop(columns=['SalePrice'], axis=1)
labels = df_train['SalePrice']
ohe = OneHotEncoder(use_cat_names=True)
features_ohe = ohe.fit_transform(features)
C:\Users\heylu\miniconda3\lib\site-packages\category_encoders\utils.py:21: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead
scaler = MinMaxScaler()
features_scaled = scaler.fit_transform(features_ohe)
X_train, X_test, y_train, y_test = train_test_split(features_scaled, labels, random_state=SEED, test_size=0.25)
dummy = DummyRegressor()
dummy.fit(X_train, y_train)
y_pred = dummy.predict(X_test)
display(f'R² de Modelo Dummy: {round(r2_score(y_test, y_pred)*100,2)}%')
'R² de Modelo Dummy: -0.13%'
def treinar_modelo(params):
SEED = 4
learning_rate = params[0]
num_leaves = params[1]
min_child_samples = params[2]
subsample = params[3]
colsample_bytree = params[4]
n_estimators = params[5]
print(params, '\n')
mdl = LGBMRegressor(random_state=SEED,
learning_rate = learning_rate,
num_leaves = num_leaves,
min_child_samples = min_child_samples,
subsample = subsample,
colsample_bytree = colsample_bytree,
subsample_freq=1,
n_estimators=n_estimators)
mdl.fit(X_train, y_train)
y_pred = mdl.predict(X_test)
return -r2_score(y_test, y_pred)
space = [(1e-3, 1e-1, 'log-uniform'), #learning rate
(2, 128), #num_leaves
(1, 100), #min_child_samples
(0.05, 1.0), #subsample
(0.1, 1.0), #colsample_bytree
(100, 1000)] #n_estimators
result = dummy_minimize(treinar_modelo,
space,
random_state=SEED,
verbose=1,
n_calls = 30)
Iteration No: 1 started. Evaluating function at random point. [0.06327656730105531, 71, 2, 0.8628399009188652, 0.6481320413049753, 493] Iteration No: 1 ended. Evaluation done at random point. Time taken: 1.0811 Function value obtained: -0.9084 Current minimum: -0.9084 Iteration No: 2 started. Evaluating function at random point. [0.0027050730096210155, 105, 95, 0.7134992241139206, 0.9128606713660984, 776] Iteration No: 2 ended. Evaluation done at random point. Time taken: 0.1656 Function value obtained: -0.7942 Current minimum: -0.9084 Iteration No: 3 started. Evaluating function at random point. [0.03620477422893411, 40, 53, 0.9842306433165472, 0.24745801726422886, 249] Iteration No: 3 ended. Evaluation done at random point. Time taken: 0.1117 Function value obtained: -0.9076 Current minimum: -0.9084 Iteration No: 4 started. Evaluating function at random point. [0.042062540164342105, 127, 57, 0.0919520550349246, 0.9609876709428125, 748] Iteration No: 4 ended. Evaluation done at random point. Time taken: 0.0788 Function value obtained: 3.1347 Current minimum: -0.9084 Iteration No: 5 started. Evaluating function at random point. [0.0010915647147779802, 60, 88, 0.947753658689371, 0.5017142776835626, 261] Iteration No: 5 ended. Evaluation done at random point. Time taken: 0.0848 Function value obtained: -0.2944 Current minimum: -0.9084 Iteration No: 6 started. Evaluating function at random point. [0.0013533779189430324, 111, 29, 0.872588621800476, 0.8591753055721483, 710] Iteration No: 6 ended. Evaluation done at random point. Time taken: 0.4418 Function value obtained: -0.7044 Current minimum: -0.9084 Iteration No: 7 started. Evaluating function at random point. [0.011371497149642092, 52, 18, 0.2979234721719005, 0.13218236522675356, 287] Iteration No: 7 ended. Evaluation done at random point. Time taken: 0.1127 Function value obtained: -0.8464 Current minimum: -0.9084 Iteration No: 8 started. Evaluating function at random point. [0.012314174496234818, 25, 95, 0.35031040595040225, 0.31693474266547406, 431] Iteration No: 8 ended. Evaluation done at random point. Time taken: 0.1077 Function value obtained: -0.8093 Current minimum: -0.9084 Iteration No: 9 started. Evaluating function at random point. [0.0062201377958643915, 116, 71, 0.7790347187356178, 0.2610765190370178, 566] Iteration No: 9 ended. Evaluation done at random point. Time taken: 0.1446 Function value obtained: -0.8584 Current minimum: -0.9084 Iteration No: 10 started. Evaluating function at random point. [0.010117334727109762, 117, 86, 0.1290106373057054, 0.32761865155725006, 159] Iteration No: 10 ended. Evaluation done at random point. Time taken: 0.0229 Function value obtained: 0.0013 Current minimum: -0.9084 Iteration No: 11 started. Evaluating function at random point. [0.06180197188255592, 10, 38, 0.8203596896072781, 0.213563492506852, 666] Iteration No: 11 ended. Evaluation done at random point. Time taken: 0.2184 Function value obtained: -0.9125 Current minimum: -0.9125 Iteration No: 12 started. Evaluating function at random point. [0.0023168120809679477, 32, 5, 0.4602309677741663, 0.8471692918885118, 643] Iteration No: 12 ended. Evaluation done at random point. Time taken: 0.6333 Function value obtained: -0.8208 Current minimum: -0.9125 Iteration No: 13 started. Evaluating function at random point. [0.024907561965918448, 80, 10, 0.9093472835765465, 0.43877752657923763, 726] Iteration No: 13 ended. Evaluation done at random point. Time taken: 1.0063 Function value obtained: -0.9121 Current minimum: -0.9125 Iteration No: 14 started. Evaluating function at random point. [0.049078049938118595, 6, 40, 0.6159037112149178, 0.17619544991836925, 836] Iteration No: 14 ended. Evaluation done at random point. Time taken: 0.1695 Function value obtained: -0.9115 Current minimum: -0.9125 Iteration No: 15 started. Evaluating function at random point. [0.001228275132159861, 55, 23, 0.256558778136363, 0.8263148930911763, 127] Iteration No: 15 ended. Evaluation done at random point. Time taken: 0.0559 Function value obtained: -0.1862 Current minimum: -0.9125 Iteration No: 16 started. Evaluating function at random point. [0.01611116779294787, 45, 87, 0.5630709931606661, 0.21626810530339394, 494] Iteration No: 16 ended. Evaluation done at random point. Time taken: 0.1117 Function value obtained: -0.8635 Current minimum: -0.9125 Iteration No: 17 started. Evaluating function at random point. [0.07956266498142855, 119, 8, 0.09610742959857489, 0.19883630630605964, 415] Iteration No: 17 ended. Evaluation done at random point. Time taken: 0.1685 Function value obtained: -0.8797 Current minimum: -0.9125 Iteration No: 18 started. Evaluating function at random point. [0.04637640720810472, 66, 88, 0.17248856910706784, 0.8480218897682718, 988] Iteration No: 18 ended. Evaluation done at random point. Time taken: 0.1297 Function value obtained: 6.9649 Current minimum: -0.9125 Iteration No: 19 started. Evaluating function at random point. [0.04171467307105799, 33, 36, 0.9404269042132899, 0.31253649990151944, 475] Iteration No: 19 ended. Evaluation done at random point. Time taken: 0.2912 Function value obtained: -0.9153 Current minimum: -0.9153 Iteration No: 20 started. Evaluating function at random point. [0.0053592788022858405, 82, 8, 0.3827709503127399, 0.27012318351053943, 570] Iteration No: 20 ended. Evaluation done at random point. Time taken: 0.4707 Function value obtained: -0.8901 Current minimum: -0.9153 Iteration No: 21 started. Evaluating function at random point. [0.004377551755279261, 43, 4, 0.5681080692628621, 0.43399556156121155, 619] Iteration No: 21 ended. Evaluation done at random point. Time taken: 0.7929 Function value obtained: -0.8924 Current minimum: -0.9153 Iteration No: 22 started. Evaluating function at random point. [0.006682583950549282, 30, 76, 0.907300763082334, 0.981654635836437, 381] Iteration No: 22 ended. Evaluation done at random point. Time taken: 0.0987 Function value obtained: -0.8513 Current minimum: -0.9153 Iteration No: 23 started. Evaluating function at random point. [0.05670034642877683, 7, 74, 0.5275156717356378, 0.7727453339354737, 648] Iteration No: 23 ended. Evaluation done at random point. Time taken: 0.1217 Function value obtained: -0.8961 Current minimum: -0.9153 Iteration No: 24 started. Evaluating function at random point. [0.001018220371397086, 21, 81, 0.18818921124796484, 0.3779151073364658, 560] Iteration No: 24 ended. Evaluation done at random point. Time taken: 0.0918 Function value obtained: -0.2883 Current minimum: -0.9153 Iteration No: 25 started. Evaluating function at random point. [0.03880133204736965, 78, 77, 0.9466076305888934, 0.2564018143797483, 739] Iteration No: 25 ended. Evaluation done at random point. Time taken: 0.2094 Function value obtained: -0.9031 Current minimum: -0.9153 Iteration No: 26 started. Evaluating function at random point. [0.002458155884597445, 53, 63, 0.05990534212573755, 0.3069072126082252, 689] Iteration No: 26 ended. Evaluation done at random point. Time taken: 0.0748 Function value obtained: 0.0013 Current minimum: -0.9153 Iteration No: 27 started. Evaluating function at random point. [0.08842111523458891, 106, 4, 0.06517271365498266, 0.5889612835211807, 126] Iteration No: 27 ended. Evaluation done at random point. Time taken: 0.0728 Function value obtained: -0.8805 Current minimum: -0.9153 Iteration No: 28 started. Evaluating function at random point. [0.0643803371849957, 83, 58, 0.5287059364413743, 0.6533375020714426, 307] Iteration No: 28 ended. Evaluation done at random point. Time taken: 0.0898 Function value obtained: -0.8993 Current minimum: -0.9153 Iteration No: 29 started. Evaluating function at random point. [0.07931181342100058, 36, 29, 0.5171982537534936, 0.9382728395501004, 358] Iteration No: 29 ended. Evaluation done at random point. Time taken: 0.1606 Function value obtained: -0.9078 Current minimum: -0.9153 Iteration No: 30 started. Evaluating function at random point. [0.003767676913287042, 107, 22, 0.7924545677028243, 0.41873098436142886, 813] Iteration No: 30 ended. Evaluation done at random point. Time taken: 0.5615 Function value obtained: -0.8911 Current minimum: -0.9153
learning_rate,num_leaves, min_child_samples, subsample, colsample_bytree, n_estimators = result.x
model = LGBMRegressor(random_state=SEED,
learning_rate = learning_rate,
num_leaves = num_leaves,
min_child_samples = min_child_samples,
subsample = subsample,
colsample_bytree = colsample_bytree,
subsample_freq=1,
n_estimators=n_estimators)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
kfold = KFold(n_splits = 10, random_state = SEED, shuffle=True)
results = cross_validate(model,
X_train,
y_train,
cv = kfold,
return_train_score=False,
scoring='r2')
cv = round(results['test_score'].mean(), 2)
display(f'Taxa Média de Acerto de Modelo Light GBM: {cv * 100:.2f} %')
'Taxa Média de Acerto de Modelo Light GBM: 89.00 %'
fig = go.Figure()
fig.add_traces(go.Scatter(y=y_test, name='Valor Atual'))
fig.add_traces(go.Scatter(y=y_pred, name='Predição', mode='markers'))
fig.update_layout(hovermode='x', title = 'Relação Predição e Valores Atuais', width = 1600, height= 800)
X_train = df_train.drop('SalePrice', axis=1)
y_train = df_train['SalePrice']
ohe = OneHotEncoder(use_cat_names=True)
ohe_train = ohe.fit_transform(X_train)
ohe_test = ohe.fit_transform(df_test.drop(columns='Id'))
C:\Users\heylu\miniconda3\lib\site-packages\category_encoders\utils.py:21: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead C:\Users\heylu\miniconda3\lib\site-packages\category_encoders\utils.py:21: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead
colunas_faltantes_test = set(ohe_train.columns) - set(ohe_test.columns)
ohe_train = ohe_train.drop(columns=list(colunas_faltantes_test))
train_scaled = scaler.fit_transform(ohe_train)
test_scaled = scaler.fit_transform(ohe_test)
model = LGBMRegressor(random_state=SEED,
learning_rate = learning_rate,
num_leaves = num_leaves,
min_child_samples = min_child_samples,
subsample = subsample,
colsample_bytree = colsample_bytree,
subsample_freq=1,
n_estimators=n_estimators)
model.fit(train_scaled, y_train)
y_pred = model.predict(test_scaled)
df_test['PredictedPrices'] = y_pred.round(2)
df_test.to_csv(path_or_buf=r'C:\Users\heylu\Dropbox\House Prices - Advanced Regression Techniques\csv_results\test_com_previsões.csv', index=False)